{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
   "metadata": {},
   "source": [
    "# Langchain Output Parsing"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
   "metadata": {},
   "source": [
    "#### Load documents, build the VectorStoreIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
    "\n",
    "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "03d1691e-544b-454f-825b-5ee12f7faa8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load documents\n",
    "documents = SimpleDirectoryReader(\"../paul_graham_essay/data\").load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens\n",
      "> [build_index_from_documents] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 18579 tokens\n",
      "> [build_index_from_documents] Total embedding token usage: 18579 tokens\n"
     ]
    }
   ],
   "source": [
    "index = VectorStoreIndex.from_documents(documents, chunk_size=512)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8b7d7c61-b5d7-4b8f-b90b-3ebee1103f27",
   "metadata": {},
   "source": [
    "#### Define Query + Langchain Output Parser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6fb88295-0840-4e2d-b79b-def0b0a63a7f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.output_parsers import LangchainOutputParser\n",
    "from llama_index.llm_predictor import StructuredLLMPredictor\n",
    "from langchain.output_parsers import StructuredOutputParser, ResponseSchema"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "057139d2-09e8-4b8d-83a1-a2356a1475a8",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "llm_predictor = StructuredLLMPredictor()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bc25edf7-9343-4e82-a3f1-eec4281a9371",
   "metadata": {},
   "source": [
    "**Define custom QA and Refine Prompts**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2833d086-d240-4798-b3c5-a83ac4593b0e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.prompts.prompts import QuestionAnswerPrompt, RefinePrompt\n",
    "from llama_index.prompts.default_prompts import (\n",
    "    DEFAULT_TEXT_QA_PROMPT_TMPL,\n",
    "    DEFAULT_REFINE_PROMPT_TMPL,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a4b9201d-fe16-4cc0-8135-a08d9928625d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "response_schemas = [\n",
    "    ResponseSchema(\n",
    "        name=\"Education\",\n",
    "        description=\"Describes the author's educational experience/background.\",\n",
    "    ),\n",
    "    ResponseSchema(\n",
    "        name=\"Work\", description=\"Describes the author's work experience/background.\"\n",
    "    ),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e73b87b8-90da-4ab8-9ff7-e40880277d9b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "lc_output_parser = StructuredOutputParser.from_response_schemas(response_schemas)\n",
    "output_parser = LangchainOutputParser(lc_output_parser)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "a9b440d4-6fb4-46e6-973f-44207b432d3f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# NOTE: we use the same output parser for both prompts, though you can choose to use different parsers\n",
    "# NOTE: here we add formatting instructions to the prompts.\n",
    "\n",
    "fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)\n",
    "fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL)\n",
    "\n",
    "qa_prompt = QuestionAnswerPrompt(fmt_qa_tmpl, output_parser=output_parser)\n",
    "refine_prompt = RefinePrompt(fmt_refine_tmpl, output_parser=output_parser)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1ba18a80-35f4-4fd4-9b13-9f13f84db4fe",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Context information is below. \n",
      "---------------------\n",
      "{context_str}\n",
      "---------------------\n",
      "Given the context information and not prior knowledge, answer the question: {query_str}\n",
      "\n",
      "\n",
      "The output should be a markdown code snippet formatted in the following schema:\n",
      "\n",
      "```json\n",
      "{{\n",
      "\t\"Education\": string  // Describes the author's educational experience/background.\n",
      "\t\"Work\": string  // Describes the author's work experience/background.\n",
      "}}\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "# take a look at the new QA template!\n",
    "print(fmt_qa_tmpl)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
   "metadata": {},
   "source": [
    "#### Query Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "fb9cdf43-0f31-4c36-869b-df9fa50aebdb",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 609 tokens\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> [query] Total LLM token usage: 609 tokens\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 11 tokens\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> [query] Total embedding token usage: 11 tokens\n"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine(\n",
    "    text_qa_template=qa_prompt,\n",
    "    refine_template=refine_prompt,\n",
    "    llm_predictor=llm_predictor,\n",
    ")\n",
    "response = query_engine.query(\n",
    "    \"What are a few things the author did growing up?\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "bc7760b6-5be3-4303-b97e-3f5edacf674b",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Education': 'Before college, the author wrote short stories and experimented with programming on an IBM 1401.', 'Work': 'The author worked on writing and programming outside of school.'}\n"
     ]
    }
   ],
   "source": [
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "002a4b5f-51ac-437a-afe7-94e2687737a9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama_index",
   "language": "python",
   "name": "llama_index"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
